#!/bin/python3

import argparse
import os.path
from xml.etree.ElementTree import parse
from thefuzz import process

# 基础的依赖，由编译器自动添加，此元组用做过滤使用
basic_so = ("libpthread.so", "libc++.so", "libz.so", "libdl.so", "libc.so", "libm.so")

# 最后打印到屏幕上的冗余数量的top仓
top_reverse = True  # 是否逆序
top_count = 20  # 查看的top数量

# 不进行匹配的仓
removed_repo = ["applications/sample/camera/communication"]
path_filter = ("test",)  # 路径中含有path_filter的将不会被统计


def get_args():
    """
    获取命令行参数
    :return: parser.parse_args()
    """
    parser = argparse.ArgumentParser(add_help=True)
    parser.add_argument("-m", "--manifest", default="./manifest.xml", type=str, help="path of manifest.xml")
    parser.add_argument("-b", "--badlog", default="./bad.log", type=str, help="path of bad.log")
    parser.add_argument("-g", "--goodlog", default="./good.log", type=str, help="path of good.log")
    args = parser.parse_args()
    return args


def get_target_dict(log: str):
    """
    从log文件解析target及其依赖的target
    :param log:
    :return:
    """
    target_so_dict = dict()  # target_path:redundant_so_set
    with open(log, 'r', encoding='utf-8') as f:
        for line in f:
            if line.startswith("."):
                target_line = line.strip()
                target_so_dict[target_line] = set()
            else:
                if '[' not in line:
                    continue
                target = line.strip().split("[")[1].rstrip("]")
                if target not in basic_so:
                    target_so_dict[target_line].add(target)
    return target_so_dict


def filter(black_tuple: tuple, target: str) -> bool:
    for item in black_tuple:
        if item in target:
            return False
    return True


def find_redundant(badlog: str, goodlog: str):
    """
    从badlog与goodlog分析出冗余的依赖
    :param badlog: 不使用as-needed编译然后使用`find . -name "*" -type f -printf "%p\n" | xargs -I param sh -c "echo param; readelf -d param | grep NEEDED" > bad.log`命令生成的log
    :param goodlog:使用as-needed编译然后使用`find . -name "*" -type f -printf "%p\n" | xargs -I param sh -c "echo param; readelf -d param | grep NEEDED" > good.log`命令生成的log
    :return: target的路径及其冗余的依赖列表所组成的dict
    """
    bad_dict = get_target_dict(badlog)
    good_dict = get_target_dict(goodlog)
    redundant_dict = dict()
    for bad_key, bad_set in bad_dict.items():
        good_set = good_dict.get(bad_key)
        if good_set is None or not filter(path_filter, bad_key):
            continue
        redundant_set = bad_set.difference(good_set)
        if len(redundant_set) != 0:
            redundant_dict[bad_key] = redundant_set
    return redundant_dict


def find_target_repository_from_manifest(manifest: str):
    """
    从manifest.xml生成的xml文件收集所有子仓的路径
    :param manifest: repo manifest > manifest.xml生成的manifest.xml文件
    :return: 子仓的路径list
    """
    repository_list = list()
    with open(manifest, 'r', encoding='utf-8') as f:
        et = parse(f)
        root = et.getroot()
        for child in root:
            path = child.get("path")
            if path is not None:
                repository_list.append(path)
    for rp in removed_repo:
        repository_list.remove(rp)
    return repository_list


def get_target_repository_path(redundant_dict: dict, repository_list: list):
    """
    获取target及其子仓所组成的字典
    注意：由于target的生成路径和子仓路径并无明确的对应规则，因此这里使用了模糊匹配利用target的生成路径从子仓路径进行的匹配，可能存在少部分匹配错误的情况
    :param redundant_dict: target的冗余依赖字典。{target全路径: target的冗余依赖}
    :param repository_list: 子仓的路径列表
    :return: target及其子仓所组成的dict
    """
    target_repo_dict = dict()
    for target in redundant_dict.keys():
        base_dirname_list = target.split("/")[1:-1]  # 取头不取尾
        path_tail = str()
        # 获取target的路径
        for b in base_dirname_list:
            path_tail = path_tail + b if base_dirname_list.index(b) == len(
                base_dirname_list) - 1 else path_tail + "{}/".format(b)
        max_score_repo = process.extractOne(path_tail, repository_list)[0]
        target_repo_dict[target] = max_score_repo
    return target_repo_dict


def reverse_dict(data: dict):
    """
    反转字典的key，value关系，并将其key组织成list的形式
    :param data: 字典
    :return: 反转后的新dict。
    """
    vkdict = dict()
    for k, v in data.items():
        if vkdict.get(v) is None:
            vkdict[v] = list()
        vkdict[v].append(k)
    return vkdict


def reverse_dict_simple(data: dict) -> dict:
    ret = dict()
    for k, v in data:
        ret[v] = k
    return ret


def top(data: dict, count: int) -> None:
    s_keys = sorted(data.keys(), reverse=top_reverse)
    f = open("top{}.txt".format(top_count), 'w', encoding='utf-8')

    for k in range(count):
        print("{}:{}".format(data.get(s_keys[k]), s_keys[k]))


def statistics(data: dict) -> None:
    statistic_file = "./statistics.txt"
    s_keys = sorted(data.keys())
    content = str()
    for k in s_keys:
        content += "[{}\t{}]\n".format(k, data.get(k))
    with open(statistic_file, 'w', encoding='utf-8') as f:
        f.write(content)


def output_repo_target_redundant(repo_target_dict: dict, redundant_dict: dict):
    """
    以
    repository
    |----target
    |----|----redundant_denpendency
    的形式输出依赖冗余关系并保存到文件：redundency.txt
    :param repo_target_dict: 子仓及其下有冗余依赖的target列表所组成的字典
    :param redundant_dict: 冗余依赖的关系字典。{存在冗余依赖的target的路径——从log产生: 该target下冗余的依赖}
    :return: None
    """
    header = "=" * 20 + "format" + "=" * 20 + "\n" + \
             "repository1 [redundant_count]\n" + \
             "|————target1\n" + \
             "|————|————redundant dependency 1\n" + \
             "|————|————redundant dependency 2\n" + \
             "respository2\n" + \
             "...\n" + \
             "=" * 46 + "\n"

    print(header)
    repo_list = sorted(repo_target_dict.keys())
    repo_dict = dict()  # 统计各仓冗余数量
    total_redundent_count = 0  # 记录总的冗余数
    with open("./redundency.txt", 'w', encoding='utf-8') as f:
        fcontent = header  # 输出到文件的内容
        for repo in repo_list:
            repo_dict[repo] = 0
            target_list = sorted(repo_target_dict.get(repo))
            content = str(repo) + " [{}]\n"
            # repo_print_content = str()
            for target in target_list:
                tr_count = len(redundant_dict.get(target))
                repo_dict[repo] += tr_count
                total_redundent_count += tr_count
                # repo_print_content += "|————{}".format(target)
                content += "|————{}\n".format(target)
                redundant_list = sorted(redundant_dict.get(target))
                for r in redundant_list:
                    # repo_print_content += "|————|————{}".format(r)
                    content += "|————|————{}\n".format(r)
            content_formated = content.format(repo_dict[repo])
            print(content_formated.rstrip())
            fcontent += content_formated
        f.writelines(fcontent)
    print("result has been saved to: ./redundency.txt")
    print("total repository: {}".format(len(repo_list)))
    print("total redundency: {}".format(total_redundent_count))
    statistics(repo_dict)
    print("statistics has been saved to: ./statistics.txt")
    count_repo_dict = reverse_dict(repo_dict)
    top(count_repo_dict, top_count)


def main():
    # 解析命令行参数
    args = get_args()
    manifest_path = args.manifest
    badlog_path = args.badlog
    goodlog_path = args.goodlog
    print("start analyzing...")
    # 利用log文件找出每个target的冗余
    redundant_dict = find_redundant(badlog_path, goodlog_path)
    # 从manifest.xml中解析出所有的子仓路径
    repository_list = find_target_repository_from_manifest(manifest_path)
    # 分析每个target属于哪个子仓
    target_repo_dict = get_target_repository_path(redundant_dict, repository_list)
    # 分析每个子仓下面有哪些target是冗余的
    repo_target_dict = reverse_dict(target_repo_dict)
    # 统计各仓的冗余数
    statistics(repo_target_dict)
    output_repo_target_redundant(repo_target_dict, redundant_dict)
    print("completed.")


def test():
    print(os.path.exists("../hello"))
    pass


if __name__ == '__main__':
    # test()
    main()
